# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# Code generated by the Google Gen AI SDK generator DO NOT EDIT.

import json
import logging
from typing import Any, Callable, Optional, Union
from urllib.parse import urlencode
import uuid

from google.genai import _api_module
from google.genai import _common
from google.genai import types as genai_types
from google.genai._common import get_value_by_path as getv
from google.genai._common import set_value_by_path as setv
import pandas as pd

from . import _evals_common
from . import _evals_utils
from . import _transformers as t
from . import types

try:
    from google.adk.agents import LlmAgent
except ImportError:
    LlmAgent = None  # type: ignore[assignment]


logger = logging.getLogger("vertexai_genai.evals")


def _CreateEvaluationItemParameters_to_vertex(
    from_object: Union[dict[str, Any], object],
    parent_object: Optional[dict[str, Any]] = None,
) -> dict[str, Any]:
    to_object: dict[str, Any] = {}
    if getv(from_object, ["evaluation_item_type"]) is not None:
        setv(
            to_object,
            ["evaluationItemType"],
            getv(from_object, ["evaluation_item_type"]),
        )

    if getv(from_object, ["gcs_uri"]) is not None:
        setv(to_object, ["gcsUri"], getv(from_object, ["gcs_uri"]))

    if getv(from_object, ["display_name"]) is not None:
        setv(to_object, ["displayName"], getv(from_object, ["display_name"]))

    if getv(from_object, ["config"]) is not None:
        setv(to_object, ["config"], getv(from_object, ["config"]))

    return to_object


def _CreateEvaluationRunParameters_to_vertex(
    from_object: Union[dict[str, Any], object],
    parent_object: Optional[dict[str, Any]] = None,
) -> dict[str, Any]:
    to_object: dict[str, Any] = {}
    if getv(from_object, ["name"]) is not None:
        setv(to_object, ["name"], getv(from_object, ["name"]))

    if getv(from_object, ["display_name"]) is not None:
        setv(to_object, ["displayName"], getv(from_object, ["display_name"]))

    if getv(from_object, ["data_source"]) is not None:
        setv(to_object, ["dataSource"], getv(from_object, ["data_source"]))

    if getv(from_object, ["evaluation_config"]) is not None:
        setv(to_object, ["evaluationConfig"], getv(from_object, ["evaluation_config"]))

    if getv(from_object, ["labels"]) is not None:
        setv(to_object, ["labels"], getv(from_object, ["labels"]))

    if getv(from_object, ["inference_configs"]) is not None:
        setv(to_object, ["inferenceConfigs"], getv(from_object, ["inference_configs"]))

    if getv(from_object, ["config"]) is not None:
        setv(to_object, ["config"], getv(from_object, ["config"]))

    return to_object


def _CreateEvaluationSetParameters_to_vertex(
    from_object: Union[dict[str, Any], object],
    parent_object: Optional[dict[str, Any]] = None,
) -> dict[str, Any]:
    to_object: dict[str, Any] = {}
    if getv(from_object, ["evaluation_items"]) is not None:
        setv(to_object, ["evaluationItems"], getv(from_object, ["evaluation_items"]))

    if getv(from_object, ["display_name"]) is not None:
        setv(to_object, ["displayName"], getv(from_object, ["display_name"]))

    if getv(from_object, ["config"]) is not None:
        setv(to_object, ["config"], getv(from_object, ["config"]))

    return to_object


def _EvaluateInstancesRequestParameters_to_vertex(
    from_object: Union[dict[str, Any], object],
    parent_object: Optional[dict[str, Any]] = None,
) -> dict[str, Any]:
    to_object: dict[str, Any] = {}
    if getv(from_object, ["bleu_input"]) is not None:
        setv(to_object, ["bleuInput"], getv(from_object, ["bleu_input"]))

    if getv(from_object, ["exact_match_input"]) is not None:
        setv(to_object, ["exactMatchInput"], getv(from_object, ["exact_match_input"]))

    if getv(from_object, ["rouge_input"]) is not None:
        setv(to_object, ["rougeInput"], getv(from_object, ["rouge_input"]))

    if getv(from_object, ["pointwise_metric_input"]) is not None:
        setv(
            to_object,
            ["pointwiseMetricInput"],
            getv(from_object, ["pointwise_metric_input"]),
        )

    if getv(from_object, ["pairwise_metric_input"]) is not None:
        setv(
            to_object,
            ["pairwiseMetricInput"],
            getv(from_object, ["pairwise_metric_input"]),
        )

    if getv(from_object, ["tool_call_valid_input"]) is not None:
        setv(
            to_object,
            ["toolCallValidInput"],
            getv(from_object, ["tool_call_valid_input"]),
        )

    if getv(from_object, ["tool_name_match_input"]) is not None:
        setv(
            to_object,
            ["toolNameMatchInput"],
            getv(from_object, ["tool_name_match_input"]),
        )

    if getv(from_object, ["tool_parameter_key_match_input"]) is not None:
        setv(
            to_object,
            ["toolParameterKeyMatchInput"],
            getv(from_object, ["tool_parameter_key_match_input"]),
        )

    if getv(from_object, ["tool_parameter_kv_match_input"]) is not None:
        setv(
            to_object,
            ["toolParameterKvMatchInput"],
            getv(from_object, ["tool_parameter_kv_match_input"]),
        )

    if getv(from_object, ["rubric_based_metric_input"]) is not None:
        setv(
            to_object,
            ["rubricBasedMetricInput"],
            _RubricBasedMetricInput_to_vertex(
                getv(from_object, ["rubric_based_metric_input"]), to_object
            ),
        )

    if getv(from_object, ["autorater_config"]) is not None:
        setv(to_object, ["autoraterConfig"], getv(from_object, ["autorater_config"]))

    if getv(from_object, ["metrics"]) is not None:
        setv(
            to_object,
            ["metrics"],
            [item for item in t.t_metrics(getv(from_object, ["metrics"]))],
        )

    if getv(from_object, ["instance"]) is not None:
        setv(to_object, ["instance"], getv(from_object, ["instance"]))

    if getv(from_object, ["config"]) is not None:
        setv(to_object, ["config"], getv(from_object, ["config"]))

    return to_object


def _EvaluationRun_from_vertex(
    from_object: Union[dict[str, Any], object],
    parent_object: Optional[dict[str, Any]] = None,
) -> dict[str, Any]:
    to_object: dict[str, Any] = {}
    if getv(from_object, ["name"]) is not None:
        setv(to_object, ["name"], getv(from_object, ["name"]))

    if getv(from_object, ["displayName"]) is not None:
        setv(to_object, ["display_name"], getv(from_object, ["displayName"]))

    if getv(from_object, ["metadata"]) is not None:
        setv(to_object, ["metadata"], getv(from_object, ["metadata"]))

    if getv(from_object, ["createTime"]) is not None:
        setv(to_object, ["create_time"], getv(from_object, ["createTime"]))

    if getv(from_object, ["completionTime"]) is not None:
        setv(to_object, ["completion_time"], getv(from_object, ["completionTime"]))

    if getv(from_object, ["state"]) is not None:
        setv(to_object, ["state"], getv(from_object, ["state"]))

    if getv(from_object, ["evaluationSetSnapshot"]) is not None:
        setv(
            to_object,
            ["evaluation_set_snapshot"],
            getv(from_object, ["evaluationSetSnapshot"]),
        )

    if getv(from_object, ["error"]) is not None:
        setv(to_object, ["error"], getv(from_object, ["error"]))

    if getv(from_object, ["dataSource"]) is not None:
        setv(to_object, ["data_source"], getv(from_object, ["dataSource"]))

    if getv(from_object, ["evaluationResults"]) is not None:
        setv(
            to_object,
            ["evaluation_run_results"],
            getv(from_object, ["evaluationResults"]),
        )

    if getv(from_object, ["evaluationConfig"]) is not None:
        setv(to_object, ["evaluation_config"], getv(from_object, ["evaluationConfig"]))

    if getv(from_object, ["inferenceConfigs"]) is not None:
        setv(to_object, ["inference_configs"], getv(from_object, ["inferenceConfigs"]))

    if getv(from_object, ["labels"]) is not None:
        setv(to_object, ["labels"], getv(from_object, ["labels"]))

    return to_object


def _GenerateInstanceRubricsRequest_to_vertex(
    from_object: Union[dict[str, Any], object],
    parent_object: Optional[dict[str, Any]] = None,
) -> dict[str, Any]:
    to_object: dict[str, Any] = {}
    if getv(from_object, ["contents"]) is not None:
        setv(to_object, ["contents"], getv(from_object, ["contents"]))

    if getv(from_object, ["predefined_rubric_generation_spec"]) is not None:
        setv(
            to_object,
            ["predefinedRubricGenerationSpec"],
            getv(from_object, ["predefined_rubric_generation_spec"]),
        )

    if getv(from_object, ["rubric_generation_spec"]) is not None:
        setv(
            to_object,
            ["rubricGenerationSpec"],
            _RubricGenerationSpec_to_vertex(
                getv(from_object, ["rubric_generation_spec"]), to_object
            ),
        )

    if getv(from_object, ["config"]) is not None:
        setv(to_object, ["config"], getv(from_object, ["config"]))

    return to_object


def _GetEvaluationItemParameters_to_vertex(
    from_object: Union[dict[str, Any], object],
    parent_object: Optional[dict[str, Any]] = None,
) -> dict[str, Any]:
    to_object: dict[str, Any] = {}
    if getv(from_object, ["name"]) is not None:
        setv(to_object, ["_url", "name"], getv(from_object, ["name"]))

    if getv(from_object, ["config"]) is not None:
        setv(to_object, ["config"], getv(from_object, ["config"]))

    return to_object


def _GetEvaluationRunParameters_to_vertex(
    from_object: Union[dict[str, Any], object],
    parent_object: Optional[dict[str, Any]] = None,
) -> dict[str, Any]:
    to_object: dict[str, Any] = {}
    if getv(from_object, ["name"]) is not None:
        setv(to_object, ["_url", "name"], getv(from_object, ["name"]))

    if getv(from_object, ["config"]) is not None:
        setv(to_object, ["config"], getv(from_object, ["config"]))

    return to_object


def _GetEvaluationSetParameters_to_vertex(
    from_object: Union[dict[str, Any], object],
    parent_object: Optional[dict[str, Any]] = None,
) -> dict[str, Any]:
    to_object: dict[str, Any] = {}
    if getv(from_object, ["name"]) is not None:
        setv(to_object, ["_url", "name"], getv(from_object, ["name"]))

    if getv(from_object, ["config"]) is not None:
        setv(to_object, ["config"], getv(from_object, ["config"]))

    return to_object


def _RubricBasedMetricInput_to_vertex(
    from_object: Union[dict[str, Any], object],
    parent_object: Optional[dict[str, Any]] = None,
) -> dict[str, Any]:
    to_object: dict[str, Any] = {}
    if getv(from_object, ["metric_spec"]) is not None:
        setv(
            to_object,
            ["metricSpec"],
            _RubricBasedMetricSpec_to_vertex(
                getv(from_object, ["metric_spec"]), to_object
            ),
        )

    if getv(from_object, ["instance"]) is not None:
        setv(to_object, ["instance"], getv(from_object, ["instance"]))

    return to_object


def _RubricBasedMetricSpec_to_vertex(
    from_object: Union[dict[str, Any], object],
    parent_object: Optional[dict[str, Any]] = None,
) -> dict[str, Any]:
    to_object: dict[str, Any] = {}
    if getv(from_object, ["metric_prompt_template"]) is not None:
        setv(
            to_object,
            ["metricPromptTemplate"],
            getv(from_object, ["metric_prompt_template"]),
        )

    if getv(from_object, ["judge_autorater_config"]) is not None:
        setv(
            to_object,
            ["judgeAutoraterConfig"],
            getv(from_object, ["judge_autorater_config"]),
        )

    if getv(from_object, ["inline_rubrics"]) is not None:
        setv(
            to_object,
            ["inline_rubrics", "rubrics"],
            getv(from_object, ["inline_rubrics"]),
        )

    if getv(from_object, ["rubric_group_key"]) is not None:
        setv(to_object, ["rubricGroupKey"], getv(from_object, ["rubric_group_key"]))

    if getv(from_object, ["rubric_generation_spec"]) is not None:
        setv(
            to_object,
            ["rubricGenerationSpec"],
            _RubricGenerationSpec_to_vertex(
                getv(from_object, ["rubric_generation_spec"]), to_object
            ),
        )

    return to_object


def _RubricGenerationSpec_to_vertex(
    from_object: Union[dict[str, Any], object],
    parent_object: Optional[dict[str, Any]] = None,
) -> dict[str, Any]:
    to_object: dict[str, Any] = {}
    if getv(from_object, ["prompt_template"]) is not None:
        setv(to_object, ["promptTemplate"], getv(from_object, ["prompt_template"]))

    if getv(from_object, ["generator_model_config"]) is not None:
        setv(to_object, ["model_config"], getv(from_object, ["generator_model_config"]))

    if getv(from_object, ["rubric_content_type"]) is not None:
        setv(
            to_object, ["rubricContentType"], getv(from_object, ["rubric_content_type"])
        )

    if getv(from_object, ["rubric_type_ontology"]) is not None:
        setv(
            to_object,
            ["rubricTypeOntology"],
            getv(from_object, ["rubric_type_ontology"]),
        )

    return to_object


class Evals(_api_module.BaseModule):

    def _create_evaluation_item(
        self,
        *,
        evaluation_item_type: str,
        gcs_uri: str,
        display_name: Optional[str] = None,
        config: Optional[types.CreateEvaluationItemConfigOrDict] = None,
    ) -> types.EvaluationItem:
        """
        Creates an EvaluationItem.
        """

        parameter_model = types._CreateEvaluationItemParameters(
            evaluation_item_type=evaluation_item_type,
            gcs_uri=gcs_uri,
            display_name=display_name,
            config=config,
        )

        request_url_dict: Optional[dict[str, str]]
        if not self._api_client.vertexai:
            raise ValueError("This method is only supported in the Vertex AI client.")
        else:
            request_dict = _CreateEvaluationItemParameters_to_vertex(parameter_model)
            request_url_dict = request_dict.get("_url")
            if request_url_dict:
                path = "evaluationItems".format_map(request_url_dict)
            else:
                path = "evaluationItems"

        query_params = request_dict.get("_query")
        if query_params:
            path = f"{path}?{urlencode(query_params)}"
        # TODO: remove the hack that pops config.
        request_dict.pop("config", None)

        http_options: Optional[types.HttpOptions] = None
        if (
            parameter_model.config is not None
            and parameter_model.config.http_options is not None
        ):
            http_options = parameter_model.config.http_options

        request_dict = _common.convert_to_dict(request_dict)
        request_dict = _common.encode_unserializable_types(request_dict)

        response = self._api_client.request("post", path, request_dict, http_options)

        response_dict = {} if not response.body else json.loads(response.body)

        return_value = types.EvaluationItem._from_response(
            response=response_dict, kwargs=parameter_model.model_dump()
        )

        self._api_client._verify_response(return_value)
        return return_value

    def _create_evaluation_run(
        self,
        *,
        name: Optional[str] = None,
        display_name: Optional[str] = None,
        data_source: types.EvaluationRunDataSourceOrDict,
        evaluation_config: types.EvaluationRunConfigOrDict,
        labels: Optional[dict[str, str]] = None,
        inference_configs: Optional[
            dict[str, types.EvaluationRunInferenceConfigOrDict]
        ] = None,
        config: Optional[types.CreateEvaluationRunConfigOrDict] = None,
    ) -> types.EvaluationRun:
        """
        Creates an EvaluationRun.
        """

        parameter_model = types._CreateEvaluationRunParameters(
            name=name,
            display_name=display_name,
            data_source=data_source,
            evaluation_config=evaluation_config,
            labels=labels,
            inference_configs=inference_configs,
            config=config,
        )

        request_url_dict: Optional[dict[str, str]]
        if not self._api_client.vertexai:
            raise ValueError("This method is only supported in the Vertex AI client.")
        else:
            request_dict = _CreateEvaluationRunParameters_to_vertex(parameter_model)
            request_url_dict = request_dict.get("_url")
            if request_url_dict:
                path = "evaluationRuns".format_map(request_url_dict)
            else:
                path = "evaluationRuns"

        query_params = request_dict.get("_query")
        if query_params:
            path = f"{path}?{urlencode(query_params)}"
        # TODO: remove the hack that pops config.
        request_dict.pop("config", None)

        http_options: Optional[types.HttpOptions] = None
        if (
            parameter_model.config is not None
            and parameter_model.config.http_options is not None
        ):
            http_options = parameter_model.config.http_options

        request_dict = _common.convert_to_dict(request_dict)
        request_dict = _common.encode_unserializable_types(request_dict)

        response = self._api_client.request("post", path, request_dict, http_options)

        response_dict = {} if not response.body else json.loads(response.body)

        if self._api_client.vertexai:
            response_dict = _EvaluationRun_from_vertex(response_dict)

        return_value = types.EvaluationRun._from_response(
            response=response_dict, kwargs=parameter_model.model_dump()
        )

        self._api_client._verify_response(return_value)
        return return_value

    def _create_evaluation_set(
        self,
        *,
        evaluation_items: list[str],
        display_name: Optional[str] = None,
        config: Optional[types.CreateEvaluationSetConfigOrDict] = None,
    ) -> types.EvaluationSet:
        """
        Creates an EvaluationSet.
        """

        parameter_model = types._CreateEvaluationSetParameters(
            evaluation_items=evaluation_items,
            display_name=display_name,
            config=config,
        )

        request_url_dict: Optional[dict[str, str]]
        if not self._api_client.vertexai:
            raise ValueError("This method is only supported in the Vertex AI client.")
        else:
            request_dict = _CreateEvaluationSetParameters_to_vertex(parameter_model)
            request_url_dict = request_dict.get("_url")
            if request_url_dict:
                path = "evaluationSets".format_map(request_url_dict)
            else:
                path = "evaluationSets"

        query_params = request_dict.get("_query")
        if query_params:
            path = f"{path}?{urlencode(query_params)}"
        # TODO: remove the hack that pops config.
        request_dict.pop("config", None)

        http_options: Optional[types.HttpOptions] = None
        if (
            parameter_model.config is not None
            and parameter_model.config.http_options is not None
        ):
            http_options = parameter_model.config.http_options

        request_dict = _common.convert_to_dict(request_dict)
        request_dict = _common.encode_unserializable_types(request_dict)

        response = self._api_client.request("post", path, request_dict, http_options)

        response_dict = {} if not response.body else json.loads(response.body)

        return_value = types.EvaluationSet._from_response(
            response=response_dict, kwargs=parameter_model.model_dump()
        )

        self._api_client._verify_response(return_value)
        return return_value

    def _evaluate_instances(
        self,
        *,
        bleu_input: Optional[types.BleuInputOrDict] = None,
        exact_match_input: Optional[types.ExactMatchInputOrDict] = None,
        rouge_input: Optional[types.RougeInputOrDict] = None,
        pointwise_metric_input: Optional[types.PointwiseMetricInputOrDict] = None,
        pairwise_metric_input: Optional[types.PairwiseMetricInputOrDict] = None,
        tool_call_valid_input: Optional[types.ToolCallValidInputOrDict] = None,
        tool_name_match_input: Optional[types.ToolNameMatchInputOrDict] = None,
        tool_parameter_key_match_input: Optional[
            types.ToolParameterKeyMatchInputOrDict
        ] = None,
        tool_parameter_kv_match_input: Optional[
            types.ToolParameterKVMatchInputOrDict
        ] = None,
        rubric_based_metric_input: Optional[types.RubricBasedMetricInputOrDict] = None,
        autorater_config: Optional[genai_types.AutoraterConfigOrDict] = None,
        metrics: Optional[list[types.MetricOrDict]] = None,
        instance: Optional[types.EvaluationInstanceOrDict] = None,
        config: Optional[types.EvaluateInstancesConfigOrDict] = None,
    ) -> types.EvaluateInstancesResponse:
        """
        Evaluates instances based on a given metric.
        """

        parameter_model = types._EvaluateInstancesRequestParameters(
            bleu_input=bleu_input,
            exact_match_input=exact_match_input,
            rouge_input=rouge_input,
            pointwise_metric_input=pointwise_metric_input,
            pairwise_metric_input=pairwise_metric_input,
            tool_call_valid_input=tool_call_valid_input,
            tool_name_match_input=tool_name_match_input,
            tool_parameter_key_match_input=tool_parameter_key_match_input,
            tool_parameter_kv_match_input=tool_parameter_kv_match_input,
            rubric_based_metric_input=rubric_based_metric_input,
            autorater_config=autorater_config,
            metrics=metrics,
            instance=instance,
            config=config,
        )

        request_url_dict: Optional[dict[str, str]]
        if not self._api_client.vertexai:
            raise ValueError("This method is only supported in the Vertex AI client.")
        else:
            request_dict = _EvaluateInstancesRequestParameters_to_vertex(
                parameter_model
            )
            request_url_dict = request_dict.get("_url")
            if request_url_dict:
                path = ":evaluateInstances".format_map(request_url_dict)
            else:
                path = ":evaluateInstances"

        query_params = request_dict.get("_query")
        if query_params:
            path = f"{path}?{urlencode(query_params)}"
        # TODO: remove the hack that pops config.
        request_dict.pop("config", None)

        http_options: Optional[types.HttpOptions] = None
        if (
            parameter_model.config is not None
            and parameter_model.config.http_options is not None
        ):
            http_options = parameter_model.config.http_options

        request_dict = _common.convert_to_dict(request_dict)
        request_dict = _common.encode_unserializable_types(request_dict)

        response = self._api_client.request("post", path, request_dict, http_options)

        response_dict = {} if not response.body else json.loads(response.body)

        return_value = types.EvaluateInstancesResponse._from_response(
            response=response_dict, kwargs=parameter_model.model_dump()
        )

        self._api_client._verify_response(return_value)
        return return_value

    def _generate_rubrics(
        self,
        *,
        contents: list[genai_types.ContentOrDict],
        predefined_rubric_generation_spec: Optional[
            types.PredefinedMetricSpecOrDict
        ] = None,
        rubric_generation_spec: Optional[types.RubricGenerationSpecOrDict] = None,
        config: Optional[types.RubricGenerationConfigOrDict] = None,
    ) -> types.GenerateInstanceRubricsResponse:
        """
        Generates rubrics for a given prompt.
        """

        parameter_model = types._GenerateInstanceRubricsRequest(
            contents=contents,
            predefined_rubric_generation_spec=predefined_rubric_generation_spec,
            rubric_generation_spec=rubric_generation_spec,
            config=config,
        )

        request_url_dict: Optional[dict[str, str]]
        if not self._api_client.vertexai:
            raise ValueError("This method is only supported in the Vertex AI client.")
        else:
            request_dict = _GenerateInstanceRubricsRequest_to_vertex(parameter_model)
            request_url_dict = request_dict.get("_url")
            if request_url_dict:
                path = ":generateInstanceRubrics".format_map(request_url_dict)
            else:
                path = ":generateInstanceRubrics"

        query_params = request_dict.get("_query")
        if query_params:
            path = f"{path}?{urlencode(query_params)}"
        # TODO: remove the hack that pops config.
        request_dict.pop("config", None)

        http_options: Optional[types.HttpOptions] = None
        if (
            parameter_model.config is not None
            and parameter_model.config.http_options is not None
        ):
            http_options = parameter_model.config.http_options

        request_dict = _common.convert_to_dict(request_dict)
        request_dict = _common.encode_unserializable_types(request_dict)

        response = self._api_client.request("post", path, request_dict, http_options)

        response_dict = {} if not response.body else json.loads(response.body)

        return_value = types.GenerateInstanceRubricsResponse._from_response(
            response=response_dict, kwargs=parameter_model.model_dump()
        )

        self._api_client._verify_response(return_value)
        return return_value

    def _get_evaluation_run(
        self, *, name: str, config: Optional[types.GetEvaluationRunConfigOrDict] = None
    ) -> types.EvaluationRun:
        """
        Retrieves an EvaluationRun from the resource name.
        """

        parameter_model = types._GetEvaluationRunParameters(
            name=name,
            config=config,
        )

        request_url_dict: Optional[dict[str, str]]
        if not self._api_client.vertexai:
            raise ValueError("This method is only supported in the Vertex AI client.")
        else:
            request_dict = _GetEvaluationRunParameters_to_vertex(parameter_model)
            request_url_dict = request_dict.get("_url")
            if request_url_dict:
                path = "evaluationRuns/{name}".format_map(request_url_dict)
            else:
                path = "evaluationRuns/{name}"

        query_params = request_dict.get("_query")
        if query_params:
            path = f"{path}?{urlencode(query_params)}"
        # TODO: remove the hack that pops config.
        request_dict.pop("config", None)

        http_options: Optional[types.HttpOptions] = None
        if (
            parameter_model.config is not None
            and parameter_model.config.http_options is not None
        ):
            http_options = parameter_model.config.http_options

        request_dict = _common.convert_to_dict(request_dict)
        request_dict = _common.encode_unserializable_types(request_dict)

        response = self._api_client.request("get", path, request_dict, http_options)

        response_dict = {} if not response.body else json.loads(response.body)

        if self._api_client.vertexai:
            response_dict = _EvaluationRun_from_vertex(response_dict)

        return_value = types.EvaluationRun._from_response(
            response=response_dict, kwargs=parameter_model.model_dump()
        )

        self._api_client._verify_response(return_value)
        return return_value

    def _get_evaluation_set(
        self, *, name: str, config: Optional[types.GetEvaluationSetConfigOrDict] = None
    ) -> types.EvaluationSet:
        """
        Retrieves an EvaluationSet from the resource name.
        """

        parameter_model = types._GetEvaluationSetParameters(
            name=name,
            config=config,
        )

        request_url_dict: Optional[dict[str, str]]
        if not self._api_client.vertexai:
            raise ValueError("This method is only supported in the Vertex AI client.")
        else:
            request_dict = _GetEvaluationSetParameters_to_vertex(parameter_model)
            request_url_dict = request_dict.get("_url")
            if request_url_dict:
                path = "evaluationSets/{name}".format_map(request_url_dict)
            else:
                path = "evaluationSets/{name}"

        query_params = request_dict.get("_query")
        if query_params:
            path = f"{path}?{urlencode(query_params)}"
        # TODO: remove the hack that pops config.
        request_dict.pop("config", None)

        http_options: Optional[types.HttpOptions] = None
        if (
            parameter_model.config is not None
            and parameter_model.config.http_options is not None
        ):
            http_options = parameter_model.config.http_options

        request_dict = _common.convert_to_dict(request_dict)
        request_dict = _common.encode_unserializable_types(request_dict)

        response = self._api_client.request("get", path, request_dict, http_options)

        response_dict = {} if not response.body else json.loads(response.body)

        return_value = types.EvaluationSet._from_response(
            response=response_dict, kwargs=parameter_model.model_dump()
        )

        self._api_client._verify_response(return_value)
        return return_value

    def _get_evaluation_item(
        self, *, name: str, config: Optional[types.GetEvaluationItemConfigOrDict] = None
    ) -> types.EvaluationItem:
        """
        Retrieves an EvaluationItem from the resource name.
        """

        parameter_model = types._GetEvaluationItemParameters(
            name=name,
            config=config,
        )

        request_url_dict: Optional[dict[str, str]]
        if not self._api_client.vertexai:
            raise ValueError("This method is only supported in the Vertex AI client.")
        else:
            request_dict = _GetEvaluationItemParameters_to_vertex(parameter_model)
            request_url_dict = request_dict.get("_url")
            if request_url_dict:
                path = "evaluationItems/{name}".format_map(request_url_dict)
            else:
                path = "evaluationItems/{name}"

        query_params = request_dict.get("_query")
        if query_params:
            path = f"{path}?{urlencode(query_params)}"
        # TODO: remove the hack that pops config.
        request_dict.pop("config", None)

        http_options: Optional[types.HttpOptions] = None
        if (
            parameter_model.config is not None
            and parameter_model.config.http_options is not None
        ):
            http_options = parameter_model.config.http_options

        request_dict = _common.convert_to_dict(request_dict)
        request_dict = _common.encode_unserializable_types(request_dict)

        response = self._api_client.request("get", path, request_dict, http_options)

        response_dict = {} if not response.body else json.loads(response.body)

        return_value = types.EvaluationItem._from_response(
            response=response_dict, kwargs=parameter_model.model_dump()
        )

        self._api_client._verify_response(return_value)
        return return_value

    def evaluate_instances(
        self,
        *,
        metric_config: types._EvaluateInstancesRequestParameters,
    ) -> types.EvaluateInstancesResponse:
        """Evaluates an instance of a model."""

        if isinstance(metric_config, types._EvaluateInstancesRequestParameters):
            metric_config = metric_config.model_dump()  # type: ignore[assignment]
        else:
            metric_config = dict(metric_config)

        return self._evaluate_instances(
            **metric_config,
        )

    def run_inference(
        self,
        *,
        src: Union[str, pd.DataFrame, types.EvaluationDataset],
        model: Optional[Union[str, Callable[[Any], Any]]] = None,
        agent: Optional[Union[str, types.AgentEngine, LlmAgent]] = None,
        location: Optional[str] = None,
        config: Optional[types.EvalRunInferenceConfigOrDict] = None,
    ) -> types.EvaluationDataset:
        """Runs inference on a dataset for evaluation.

        Args:
          src: The source of the dataset. Can be a string (path to a local file,
                a GCS path, or a BigQuery table), a Pandas DataFrame, or an
                EvaluationDataset object. If an Evalu
                ationDataset is provided,
                it must have `eval_dataset_df` populated.
          model: Optional type is experimental and may change in future versions.
                The model to use for inference, optional for agent evaluations.
              - For Google Gemini models, provide the model name string (e.g., "gemini-2.5-flash").
              - For third-party models via LiteLLM, use the format "provider/model_name"
                (e.g., "openai/gpt-4o"). Ensure the necessary API key (e.g., OPENAI_API_KEY)
                is set as an environment variable.
              - For custom logic, provide a callable function that accepts a prompt and
                returns a response.
          agent: This field is experimental and may change in future versions
                The agent engine used or local agent to run agent, optional for non-agent evaluations.
              - agent engine resource name in str type, with format
                `projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine_id}`,
                run_inference will fetch the agent engine from the resource name.
              - Or `types.AgentEngine` object.
              - Or ADK agent in LlMAgent type.
          location: The location to use for the inference. If not specified, the
                location configured in the client will be used. If specified,
                this will override the location set in `vertexai.Client` only
                for this API call.
          config: The optional configuration for the inference run. Must be a dict or
              `types.EvalRunInferenceConfig` type.
                - dest: The destination path for storage of the inference results.
                - prompt_template: The template string to use for constructing prompts.
                - generate_content_config: The config for the Gemini generate content call.

        Returns:
          The evaluation dataset.
        """
        if not config:
            config = types.EvalRunInferenceConfig()
        if isinstance(config, dict):
            config = types.EvalRunInferenceConfig.model_validate(config)

        if isinstance(src, types.EvaluationDataset):
            if src.eval_dataset_df is None:
                raise ValueError(
                    "EvaluationDataset must have eval_dataset_df populated."
                )
            src = src.eval_dataset_df

        agent_engine_instance = None
        agent_instance = None
        if agent:
            if isinstance(agent, str) or isinstance(agent, types.AgentEngine):
                agent_engine_instance = agent
            else:
                agent_instance = agent

        return _evals_common._execute_inference(  # type: ignore[no-any-return]
            api_client=self._api_client,
            model=model,
            agent_engine=agent_engine_instance,
            agent=agent_instance,
            src=src,
            dest=config.dest,
            prompt_template=config.prompt_template,
            location=location,
            config=config.generate_content_config,
        )

    def evaluate(
        self,
        *,
        dataset: Union[
            pd.DataFrame,
            types.EvaluationDatasetOrDict,
            list[types.EvaluationDatasetOrDict],
        ],
        metrics: list[types.MetricOrDict] = None,
        location: Optional[str] = None,
        config: Optional[types.EvaluateMethodConfigOrDict] = None,
        **kwargs,
    ) -> types.EvaluationResult:
        """Evaluates candidate responses in the provided dataset(s) using the specified metrics.

        Args:
          dataset: The dataset(s) to evaluate. Can be a pandas DataFrame, a single
            `types.EvaluationDataset` or a list of `types.EvaluationDataset`.
          metrics: The list of metrics to use for evaluation.
          location: The location to use for the evaluation service. If not specified,
             the location configured in the client will be used. If specified,
             this will override the location set in `vertexai.Client` only for
             this API call.
          config: Optional configuration for the evaluation. Can be a dictionary or a
            `types.EvaluateMethodConfig` object.
            - dataset_schema: Schema to use for the dataset. If not specified, the
              dataset schema will be inferred from the dataset automatically.
            - dest: Destination path for storing evaluation results.
          **kwargs: Extra arguments to pass to evaluation, such as `agent_info`.

        Returns:
          The evaluation result.
        """
        if not config:
            config = types.EvaluateMethodConfig()
        if isinstance(config, dict):
            config = types.EvaluateMethodConfig.model_validate(config)

        if isinstance(dataset, pd.DataFrame):
            dataset = types.EvaluationDataset(eval_dataset_df=dataset)

        if isinstance(dataset, list):
            dataset = [
                (
                    types.EvaluationDataset.model_validate(ds_item)
                    if isinstance(ds_item, dict)
                    else ds_item
                )
                for ds_item in dataset
            ]
        else:
            if isinstance(dataset, dict):
                dataset = types.EvaluationDataset.model_validate(dataset)
        if metrics is None:
            metrics = [types.Metric(name="general_quality_v1")]

        # TODO: Replace kwargs with agent_info after the experimental phase.
        if kwargs:
            logger.warning(
                "`kwargs` attribute in `evaluate` method is experimental and may change in future versions."
            )

        return _evals_common._execute_evaluation(
            api_client=self._api_client,
            dataset=dataset,
            metrics=metrics,
            dataset_schema=config.dataset_schema,
            dest=config.dest,
            location=location,
            **kwargs,
        )

    def batch_evaluate(
        self,
        *,
        dataset: types.EvaluationDatasetOrDict,
        metrics: list[types.MetricOrDict],
        dest: str,
        config: Optional[types.EvaluateDatasetConfigOrDict] = None,
    ) -> types.EvaluateDatasetOperation:
        """Evaluates a dataset based on a set of given metrics."""

        resolved_metrics = _evals_common._resolve_metrics(metrics, self._api_client)
        output_config = genai_types.OutputConfig(
            gcs_destination=genai_types.GcsDestination(output_uri_prefix=dest)
        )
        parameter_model = types.EvaluateDatasetRequestParameters(
            dataset=dataset,
            metrics=resolved_metrics,
            output_config=output_config,
            config=config,
        )

        request_url_dict: Optional[dict[str, str]]
        if not self._api_client.vertexai:
            raise ValueError("This method is only supported in the Vertex AI client.")
        else:
            request_dict = _evals_utils.BatchEvaluateRequestPreparer.EvaluateDatasetRequestParameters_to_vertex(
                parameter_model
            )
            request_url_dict = request_dict.get("_url")
            if request_url_dict:
                path = ":evaluateDataset".format_map(request_url_dict)
            else:
                path = ":evaluateDataset"

        request_dict = _evals_utils.BatchEvaluateRequestPreparer.prepare_metric_payload(
            request_dict, resolved_metrics
        )

        query_params = request_dict.get("_query")
        if query_params:
            path = f"{path}?{urlencode(query_params)}"
        # TODO: remove the hack that pops config.
        request_dict.pop("config", None)

        http_options: Optional[types.HttpOptions] = None
        if (
            parameter_model.config is not None
            and parameter_model.config.http_options is not None
        ):
            http_options = parameter_model.config.http_options

        request_dict = _common.convert_to_dict(request_dict)
        request_dict = _common.encode_unserializable_types(request_dict)

        response = self._api_client.request("post", path, request_dict, http_options)

        response_dict = "" if not response.body else json.loads(response.body)

        if self._api_client.vertexai:
            response_dict = _evals_utils.BatchEvaluateRequestPreparer.EvaluateDatasetOperation_from_vertex(
                response_dict
            )

        return_value = types.EvaluateDatasetOperation._from_response(
            response=response_dict, kwargs=parameter_model.model_dump()
        )
        self._api_client._verify_response(return_value)

        return return_value

    def generate_rubrics(
        self,
        *,
        src: Union[str, "pd.DataFrame", types.EvaluationDataset],
        rubric_group_name: str,
        prompt_template: Optional[str] = None,
        generator_model_config: Optional["genai_types.AutoraterConfigOrDict"] = None,
        rubric_content_type: Optional["types.RubricContentType"] = None,
        rubric_type_ontology: Optional[list[str]] = None,
        predefined_spec_name: Optional[Union[str, "types.PrebuiltMetric"]] = None,
        metric_spec_parameters: Optional[dict[str, Any]] = None,
        config: Optional[types.RubricGenerationConfigOrDict] = None,
    ) -> types.EvaluationDataset:
        """Generates rubrics for each prompt in the source and adds them as a new column
        structured as a dictionary.

        You can generate rubrics by providing either:
          1. A `predefined_spec_name` to use a Vertex AI backend recipe.
          2. A `prompt_template` along with other configuration parameters
             (`generator_model_config`, `rubric_content_type`, `rubric_type_ontology`)
             for custom rubric generation.

        These two modes are mutually exclusive.

        Args:
            src: The source of the prompts. Can be a string (path to a local
              file, a GCS path, or a BigQuery table), a Pandas DataFrame, or
              an EvaluationDataset object. The loaded data must contain either
              a 'prompt' column (for text) or a 'request' column (for text or
              multimodal Gemini Content).
            rubric_group_name: Name for the key within the dictionary in the new
              column.
            prompt_template: Optional. Template for the rubric generation prompt. Used for
              custom rubric generation. Mutually exclusive with `predefined_spec_name`.
              If using a 'prompt' column, use {prompt} as the placeholder. If using a
              'request' column, this template is passed to the service along
              with the content.
            generator_model_config: Optional. Configuration for the model used
              in custom rubric generation. Only used if `prompt_template` is provided.
              e.g., {"autorater_model": "gemini-2.5-flash"}.
            rubric_content_type: Optional. The type of rubric content to be
              generated. Only used if `prompt_template` is provided.
            rubric_type_ontology: Optional. A pre-defined list of allowed types
              for generated rubrics. Only used if `prompt_template` is provided.
            predefined_spec_name: Optional. The name of a Predefined Metric to use
                for rubric generation (e.g., "general_quality_v1") or a types.PrebuiltMetric object.
                Mutually exclusive with `prompt_template` and its related parameters.
            metric_spec_parameters: Optional. Parameters for the Predefined Metric,
                used to customize rubric generation. Only used if `predefined_spec_name` is set.
                Example: {"guidelines": ["The response must be in Japanese."]}
            config: Optional. Configuration for the rubric generation process.

        Returns:
            An `EvaluationDataset` with an added column named `rubric_groups` in its
            `eval_dataset_df`. Each cell in this column contains a dictionary like:
            {rubric_group_name: [list[Rubric]]}.
        """
        if isinstance(src, types.EvaluationDataset):
            if src.eval_dataset_df is None:
                raise ValueError(
                    "EvaluationDataset must have eval_dataset_df populated."
                )
            prompts_df = src.eval_dataset_df
        elif isinstance(src, (str, pd.DataFrame)):
            try:
                prompts_df = _evals_common._load_dataframe(self._api_client, src)
            except Exception as e:
                raise ValueError(
                    f"Failed to load prompt dataset from source: {src}. Error: {e}"
                )
        else:
            raise TypeError(
                "Unsupported type for src. Must be str, pd.DataFrame, or types.EvaluationDataset."
            )

        if "prompt" not in prompts_df.columns and "request" not in prompts_df.columns:
            raise ValueError("Loaded dataset must have a 'prompt' or 'request' column.")
        if not rubric_group_name:
            raise ValueError("rubric_group_name cannot be empty.")

        input_column = "request" if "request" in prompts_df.columns else "prompt"
        logger.info(
            "Generating rubrics for %d prompts from column '%s', group: '%s'...",
            len(prompts_df),
            input_column,
            rubric_group_name,
        )
        all_rubric_groups: list[dict[str, list[types.Rubric]]] = []

        rubric_gen_spec = None
        predefined_spec = None

        if predefined_spec_name:
            if prompt_template:
                logger.warning(
                    "prompt_template is ignored when predefined_spec_name is provided."
                )
            if generator_model_config:
                logger.warning(
                    "generator_model_config is ignored when predefined_spec_name is provided."
                )
            if rubric_content_type:
                logger.warning(
                    "rubric_content_type is ignored when predefined_spec_name is provided."
                )
            if rubric_type_ontology:
                logger.warning(
                    "rubric_type_ontology is ignored when predefined_spec_name is provided."
                )

            if isinstance(predefined_spec_name, str):
                actual_predefined_spec_name = predefined_spec_name
            elif hasattr(
                predefined_spec_name, "resolve"
            ):  # Check if it's LazyLoadedPrebuiltMetric
                try:
                    resolved_metric = predefined_spec_name.resolve(self._api_client)
                    actual_predefined_spec_name = resolved_metric.name
                except Exception as e:
                    raise ValueError(f"Failed to resolve PrebuiltMetric: {e}")
            else:
                raise TypeError(
                    "predefined_spec_name must be a string or types.PrebuiltMetric"
                )

            if not actual_predefined_spec_name:
                raise ValueError(
                    "Could not determine metric_spec_name from predefined_spec_name"
                )

            predefined_spec = types.PredefinedMetricSpec(
                metric_spec_name=actual_predefined_spec_name,
                metric_spec_parameters=metric_spec_parameters,
            )
        elif prompt_template:
            if metric_spec_parameters:
                logger.warning(
                    "metric_spec_parameters is ignored when prompt_template is provided."
                )
            spec_dict = {
                "prompt_template": prompt_template,
                "rubric_content_type": rubric_content_type,
                "rubric_type_ontology": rubric_type_ontology,
                "generator_model_config": generator_model_config,
            }
            spec_dict = {k: v for k, v in spec_dict.items() if v is not None}
            rubric_gen_spec = types.RubricGenerationSpec.model_validate(spec_dict)
        else:
            raise ValueError(
                "Either predefined_spec_name or prompt_template must be provided."
            )

        for _, row in prompts_df.iterrows():
            input_data = row[input_column]
            if isinstance(input_data, str):
                contents = [
                    genai_types.Content(parts=[genai_types.Part(text=input_data)])
                ]
            elif isinstance(input_data, list):
                contents = input_data
            else:
                logger.warning(
                    f"Skipping row: Unexpected input format in column '{input_column}'."
                )
                all_rubric_groups.append({rubric_group_name: []})
                continue

            try:
                response = self._generate_rubrics(
                    contents=contents,
                    rubric_generation_spec=rubric_gen_spec,
                    predefined_rubric_generation_spec=predefined_spec,
                    config=config,
                )
                rubric_group = {rubric_group_name: response.generated_rubrics}
                all_rubric_groups.append(rubric_group)
            except Exception as e:
                logger.error(
                    "Rubric generation failed for input: %s... Error: %s",
                    str(input_data)[:50],
                    e,
                    exc_info=True,
                )
                all_rubric_groups.append({rubric_group_name: []})

        prompts_with_rubrics = prompts_df.copy()
        prompts_with_rubrics["rubric_groups"] = all_rubric_groups
        logger.info(
            f"Rubric generation complete. Added column 'rubric_groups' with key '{rubric_group_name}'."
        )
        return types.EvaluationDataset(eval_dataset_df=prompts_with_rubrics)

    @_common.experimental_warning(
        "The Vertex SDK GenAI evals.get_evaluation_run module is experimental, "
        "and may change in future versions."
    )
    def get_evaluation_run(
        self,
        *,
        name: str,
        include_evaluation_items: bool = False,
        config: Optional[types.GetEvaluationRunConfigOrDict] = None,
    ) -> types.EvaluationRun:
        """Retrieves an EvaluationRun from the resource name.
        Args:
          name: The resource name of the EvaluationRun. Format:
            `projects/{project}/locations/{location}/evaluationRuns/{evaluation_run}`
          include_evaluation_items: Whether to include the evaluation items in the
            response.
          config: The optional configuration for the evaluation run. Must be a dict or
              `types.GetEvaluationRunConfigOrDict` type.

        Returns:
          The evaluation run.
        Raises:
          ValueError: If the name is empty or invalid.
        """
        if not name:
            raise ValueError("name cannot be empty.")
        if name.startswith("projects/"):
            name = name.split("/")[-1]
        result = self._get_evaluation_run(name=name, config=config)
        if include_evaluation_items:
            result.evaluation_item_results = (
                _evals_common._convert_evaluation_run_results(
                    self._api_client,
                    result.evaluation_run_results,
                    result.inference_configs,
                )
            )
        return result

    @_common.experimental_warning(
        "The Vertex SDK GenAI evals.create_evaluation_run module is experimental, "
        "and may change in future versions."
    )
    def create_evaluation_run(
        self,
        *,
        dataset: Union[types.EvaluationRunDataSource, types.EvaluationDataset],
        dest: str,
        metrics: list[types.EvaluationRunMetricOrDict],
        name: Optional[str] = None,
        display_name: Optional[str] = None,
        agent_info: Optional[types.evals.AgentInfoOrDict] = None,
        labels: Optional[dict[str, str]] = None,
        config: Optional[types.CreateEvaluationRunConfigOrDict] = None,
    ) -> types.EvaluationRun:
        """Creates an EvaluationRun.

        Args:
          dataset: The dataset to evaluate. Either an EvaluationRunDataSource or an EvaluationDataset.
          dest: The GCS URI prefix to write the evaluation results to.
          metrics: The list of metrics to evaluate.
          name: The name of the evaluation run.
          display_name: The display name of the evaluation run.
          agent_info: The agent info to evaluate.
          labels: The labels to apply to the evaluation run.
          config: The configuration for the evaluation run.

        Returns:
            The created evaluation run.
        """
        if agent_info and isinstance(agent_info, dict):
            agent_info = types.evals.AgentInfo.model_validate(agent_info)
        if type(dataset).__name__ == "EvaluationDataset":
            if dataset.eval_dataset_df is None:
                raise ValueError(
                    "EvaluationDataset must have eval_dataset_df populated."
                )
            if (
                dataset.candidate_name
                and agent_info.name
                and dataset.candidate_name != agent_info.name
            ):
                logger.warning(
                    "Evaluation dataset candidate_name and agent_info.name are different. Please make sure this is intended."
                )
            elif dataset.candidate_name is None and agent_info:
                dataset.candidate_name = agent_info.name
            eval_set = _evals_common._create_evaluation_set_from_dataframe(
                self._api_client, dest, dataset.eval_dataset_df, dataset.candidate_name
            )
            dataset = types.EvaluationRunDataSource(evaluation_set=eval_set.name)
        output_config = genai_types.OutputConfig(
            gcs_destination=genai_types.GcsDestination(output_uri_prefix=dest)
        )
        resolved_metrics = _evals_common._resolve_evaluation_run_metrics(
            metrics, self._api_client
        )
        evaluation_config = types.EvaluationRunConfig(
            output_config=output_config, metrics=resolved_metrics
        )
        inference_configs = {}
        if agent_info:
            inference_configs[agent_info.name] = types.EvaluationRunInferenceConfig(
                agent_config=types.EvaluationRunAgentConfig(
                    developer_instruction=genai_types.Content(
                        parts=[genai_types.Part(text=agent_info.instruction)]
                    ),
                    tools=agent_info.tool_declarations,
                )
            )
            if agent_info.agent_resource_name:
                labels = labels or {}
                labels["vertex-ai-evaluation-agent-engine-id"] = (
                    agent_info.agent_resource_name.split("reasoningEngines/")[-1]
                )
        if not name:
            name = f"evaluation_run_{uuid.uuid4()}"

        return self._create_evaluation_run(  # type: ignore[no-any-return]
            name=name,
            display_name=display_name or name,
            data_source=dataset,
            evaluation_config=evaluation_config,
            inference_configs=inference_configs,
            labels=labels,
            config=config,
        )

    @_common.experimental_warning(
        "The Vertex SDK GenAI evals.get_evaluation_set method is experimental, "
        "and may change in future versions."
    )
    def get_evaluation_set(
        self,
        *,
        name: str,
        config: Optional[types.GetEvaluationSetConfigOrDict] = None,
    ) -> types.EvaluationSet:
        """Retrieves an EvaluationSet from the resource name.

        Args:
          name: The resource name of the EvaluationSet. Format:
            `projects/{project}/locations/{location}/evaluationSets/{evaluation_set}`
          config: The optional configuration for the evaluation set. Must be a dict or
              `types.GetEvaluationSetConfigOrDict` type.

        Returns:
          The evaluation set.
        """

        if not name:
            raise ValueError("name cannot be empty.")
        if name.startswith("projects/"):
            name = name.split("/")[-1]
        return self._get_evaluation_set(name=name, config=config)

    @_common.experimental_warning(
        "The Vertex SDK GenAI evals.get_evaluation_item method is experimental, "
        "and may change in future versions."
    )
    def get_evaluation_item(
        self,
        *,
        name: str,
        config: Optional[types.GetEvaluationItemConfigOrDict] = None,
    ) -> types.EvaluationItem:
        """Retrieves an EvaluationItem from the resource name.

        Args:
          name: The resource name of the EvaluationItem. Format:
            `projects/{project}/locations/{location}/evaluationItems/{evaluation_item}`
          config: The optional configuration for the evaluation item. Must be a dict or
              `types.GetEvaluationItemConfigOrDict` type.

        Returns:
          The evaluation item.
        """
        if not name:
            raise ValueError("name cannot be empty.")
        if name.startswith("projects/"):
            name = name.split("/")[-1]
        result = self._get_evaluation_item(name=name, config=config)
        if (
            result.gcs_uri
            and result.evaluation_item_type == types.EvaluationItemType.RESULT
        ):
            result.evaluation_response = (
                _evals_common._convert_gcs_to_evaluation_item_result(
                    self._api_client, result.gcs_uri
                )
            )
        elif (
            result.gcs_uri
            and result.evaluation_item_type == types.EvaluationItemType.REQUEST
        ):
            result.evaluation_request = (
                _evals_common._convert_gcs_to_evaluation_item_request(
                    self._api_client, result.gcs_uri
                )
            )
        return result

    @_common.experimental_warning(
        "The Vertex SDK GenAI evals.create_evaluation_item module is experimental, "
        "and may change in future versions."
    )
    def create_evaluation_item(
        self,
        *,
        evaluation_item_type: types.EvaluationItemType,
        gcs_uri: str,
        display_name: Optional[str] = None,
        config: Optional[types.CreateEvaluationItemConfigOrDict] = None,
    ) -> types.EvaluationItem:
        """Creates an EvaluationItem.

        Args:
          evaluation_item_type: The type of the evaluation item.
          gcs_uri: The GCS URI of the evaluation item.
          display_name: The display name of the evaluation item.
          config: The optional configuration for the evaluation item. Must be a dict or
              `types.CreateEvaluationItemConfigOrDict` type.

        Returns:
          The evaluation item.
        """
        return self._create_evaluation_item(  # type: ignore[no-any-return]
            evaluation_item_type=evaluation_item_type,
            gcs_uri=gcs_uri,
            display_name=display_name,
            config=config,
        )

    @_common.experimental_warning(
        "The Vertex SDK GenAI evals.create_evaluation_set module is experimental, "
        "and may change in future versions."
    )
    def create_evaluation_set(
        self,
        *,
        evaluation_items: list[str],
        display_name: Optional[str] = None,
        config: Optional[types.CreateEvaluationSetConfigOrDict] = None,
    ) -> types.EvaluationSet:
        """Creates an EvaluationSet.

        Args:
          evaluation_items: The list of evaluation item names. Format:
            `projects/{project}/locations/{location}/evaluationItems/{evaluation_item}`
          display_name: The display name of the evaluation set.
          config: The optional configuration for the evaluation set. Must be a dict or
              `types.CreateEvaluationSetConfigOrDict` type.

        Returns:
          The evaluation set.
        """
        return self._create_evaluation_set(  # type: ignore[no-any-return]
            evaluation_items=evaluation_items,
            display_name=display_name,
            config=config,
        )


class AsyncEvals(_api_module.BaseModule):

    async def _create_evaluation_item(
        self,
        *,
        evaluation_item_type: str,
        gcs_uri: str,
        display_name: Optional[str] = None,
        config: Optional[types.CreateEvaluationItemConfigOrDict] = None,
    ) -> types.EvaluationItem:
        """
        Creates an EvaluationItem.
        """

        parameter_model = types._CreateEvaluationItemParameters(
            evaluation_item_type=evaluation_item_type,
            gcs_uri=gcs_uri,
            display_name=display_name,
            config=config,
        )

        request_url_dict: Optional[dict[str, str]]
        if not self._api_client.vertexai:
            raise ValueError("This method is only supported in the Vertex AI client.")
        else:
            request_dict = _CreateEvaluationItemParameters_to_vertex(parameter_model)
            request_url_dict = request_dict.get("_url")
            if request_url_dict:
                path = "evaluationItems".format_map(request_url_dict)
            else:
                path = "evaluationItems"

        query_params = request_dict.get("_query")
        if query_params:
            path = f"{path}?{urlencode(query_params)}"
        # TODO: remove the hack that pops config.
        request_dict.pop("config", None)

        http_options: Optional[types.HttpOptions] = None
        if (
            parameter_model.config is not None
            and parameter_model.config.http_options is not None
        ):
            http_options = parameter_model.config.http_options

        request_dict = _common.convert_to_dict(request_dict)
        request_dict = _common.encode_unserializable_types(request_dict)

        response = await self._api_client.async_request(
            "post", path, request_dict, http_options
        )

        response_dict = {} if not response.body else json.loads(response.body)

        return_value = types.EvaluationItem._from_response(
            response=response_dict, kwargs=parameter_model.model_dump()
        )

        self._api_client._verify_response(return_value)
        return return_value

    async def _create_evaluation_run(
        self,
        *,
        name: Optional[str] = None,
        display_name: Optional[str] = None,
        data_source: types.EvaluationRunDataSourceOrDict,
        evaluation_config: types.EvaluationRunConfigOrDict,
        labels: Optional[dict[str, str]] = None,
        inference_configs: Optional[
            dict[str, types.EvaluationRunInferenceConfigOrDict]
        ] = None,
        config: Optional[types.CreateEvaluationRunConfigOrDict] = None,
    ) -> types.EvaluationRun:
        """
        Creates an EvaluationRun.
        """

        parameter_model = types._CreateEvaluationRunParameters(
            name=name,
            display_name=display_name,
            data_source=data_source,
            evaluation_config=evaluation_config,
            labels=labels,
            inference_configs=inference_configs,
            config=config,
        )

        request_url_dict: Optional[dict[str, str]]
        if not self._api_client.vertexai:
            raise ValueError("This method is only supported in the Vertex AI client.")
        else:
            request_dict = _CreateEvaluationRunParameters_to_vertex(parameter_model)
            request_url_dict = request_dict.get("_url")
            if request_url_dict:
                path = "evaluationRuns".format_map(request_url_dict)
            else:
                path = "evaluationRuns"

        query_params = request_dict.get("_query")
        if query_params:
            path = f"{path}?{urlencode(query_params)}"
        # TODO: remove the hack that pops config.
        request_dict.pop("config", None)

        http_options: Optional[types.HttpOptions] = None
        if (
            parameter_model.config is not None
            and parameter_model.config.http_options is not None
        ):
            http_options = parameter_model.config.http_options

        request_dict = _common.convert_to_dict(request_dict)
        request_dict = _common.encode_unserializable_types(request_dict)

        response = await self._api_client.async_request(
            "post", path, request_dict, http_options
        )

        response_dict = {} if not response.body else json.loads(response.body)

        if self._api_client.vertexai:
            response_dict = _EvaluationRun_from_vertex(response_dict)

        return_value = types.EvaluationRun._from_response(
            response=response_dict, kwargs=parameter_model.model_dump()
        )

        self._api_client._verify_response(return_value)
        return return_value

    async def _create_evaluation_set(
        self,
        *,
        evaluation_items: list[str],
        display_name: Optional[str] = None,
        config: Optional[types.CreateEvaluationSetConfigOrDict] = None,
    ) -> types.EvaluationSet:
        """
        Creates an EvaluationSet.
        """

        parameter_model = types._CreateEvaluationSetParameters(
            evaluation_items=evaluation_items,
            display_name=display_name,
            config=config,
        )

        request_url_dict: Optional[dict[str, str]]
        if not self._api_client.vertexai:
            raise ValueError("This method is only supported in the Vertex AI client.")
        else:
            request_dict = _CreateEvaluationSetParameters_to_vertex(parameter_model)
            request_url_dict = request_dict.get("_url")
            if request_url_dict:
                path = "evaluationSets".format_map(request_url_dict)
            else:
                path = "evaluationSets"

        query_params = request_dict.get("_query")
        if query_params:
            path = f"{path}?{urlencode(query_params)}"
        # TODO: remove the hack that pops config.
        request_dict.pop("config", None)

        http_options: Optional[types.HttpOptions] = None
        if (
            parameter_model.config is not None
            and parameter_model.config.http_options is not None
        ):
            http_options = parameter_model.config.http_options

        request_dict = _common.convert_to_dict(request_dict)
        request_dict = _common.encode_unserializable_types(request_dict)

        response = await self._api_client.async_request(
            "post", path, request_dict, http_options
        )

        response_dict = {} if not response.body else json.loads(response.body)

        return_value = types.EvaluationSet._from_response(
            response=response_dict, kwargs=parameter_model.model_dump()
        )

        self._api_client._verify_response(return_value)
        return return_value

    async def _evaluate_instances(
        self,
        *,
        bleu_input: Optional[types.BleuInputOrDict] = None,
        exact_match_input: Optional[types.ExactMatchInputOrDict] = None,
        rouge_input: Optional[types.RougeInputOrDict] = None,
        pointwise_metric_input: Optional[types.PointwiseMetricInputOrDict] = None,
        pairwise_metric_input: Optional[types.PairwiseMetricInputOrDict] = None,
        tool_call_valid_input: Optional[types.ToolCallValidInputOrDict] = None,
        tool_name_match_input: Optional[types.ToolNameMatchInputOrDict] = None,
        tool_parameter_key_match_input: Optional[
            types.ToolParameterKeyMatchInputOrDict
        ] = None,
        tool_parameter_kv_match_input: Optional[
            types.ToolParameterKVMatchInputOrDict
        ] = None,
        rubric_based_metric_input: Optional[types.RubricBasedMetricInputOrDict] = None,
        autorater_config: Optional[genai_types.AutoraterConfigOrDict] = None,
        metrics: Optional[list[types.MetricOrDict]] = None,
        instance: Optional[types.EvaluationInstanceOrDict] = None,
        config: Optional[types.EvaluateInstancesConfigOrDict] = None,
    ) -> types.EvaluateInstancesResponse:
        """
        Evaluates instances based on a given metric.
        """

        parameter_model = types._EvaluateInstancesRequestParameters(
            bleu_input=bleu_input,
            exact_match_input=exact_match_input,
            rouge_input=rouge_input,
            pointwise_metric_input=pointwise_metric_input,
            pairwise_metric_input=pairwise_metric_input,
            tool_call_valid_input=tool_call_valid_input,
            tool_name_match_input=tool_name_match_input,
            tool_parameter_key_match_input=tool_parameter_key_match_input,
            tool_parameter_kv_match_input=tool_parameter_kv_match_input,
            rubric_based_metric_input=rubric_based_metric_input,
            autorater_config=autorater_config,
            metrics=metrics,
            instance=instance,
            config=config,
        )

        request_url_dict: Optional[dict[str, str]]
        if not self._api_client.vertexai:
            raise ValueError("This method is only supported in the Vertex AI client.")
        else:
            request_dict = _EvaluateInstancesRequestParameters_to_vertex(
                parameter_model
            )
            request_url_dict = request_dict.get("_url")
            if request_url_dict:
                path = ":evaluateInstances".format_map(request_url_dict)
            else:
                path = ":evaluateInstances"

        query_params = request_dict.get("_query")
        if query_params:
            path = f"{path}?{urlencode(query_params)}"
        # TODO: remove the hack that pops config.
        request_dict.pop("config", None)

        http_options: Optional[types.HttpOptions] = None
        if (
            parameter_model.config is not None
            and parameter_model.config.http_options is not None
        ):
            http_options = parameter_model.config.http_options

        request_dict = _common.convert_to_dict(request_dict)
        request_dict = _common.encode_unserializable_types(request_dict)

        response = await self._api_client.async_request(
            "post", path, request_dict, http_options
        )

        response_dict = {} if not response.body else json.loads(response.body)

        return_value = types.EvaluateInstancesResponse._from_response(
            response=response_dict, kwargs=parameter_model.model_dump()
        )

        self._api_client._verify_response(return_value)
        return return_value

    async def _generate_rubrics(
        self,
        *,
        contents: list[genai_types.ContentOrDict],
        predefined_rubric_generation_spec: Optional[
            types.PredefinedMetricSpecOrDict
        ] = None,
        rubric_generation_spec: Optional[types.RubricGenerationSpecOrDict] = None,
        config: Optional[types.RubricGenerationConfigOrDict] = None,
    ) -> types.GenerateInstanceRubricsResponse:
        """
        Generates rubrics for a given prompt.
        """

        parameter_model = types._GenerateInstanceRubricsRequest(
            contents=contents,
            predefined_rubric_generation_spec=predefined_rubric_generation_spec,
            rubric_generation_spec=rubric_generation_spec,
            config=config,
        )

        request_url_dict: Optional[dict[str, str]]
        if not self._api_client.vertexai:
            raise ValueError("This method is only supported in the Vertex AI client.")
        else:
            request_dict = _GenerateInstanceRubricsRequest_to_vertex(parameter_model)
            request_url_dict = request_dict.get("_url")
            if request_url_dict:
                path = ":generateInstanceRubrics".format_map(request_url_dict)
            else:
                path = ":generateInstanceRubrics"

        query_params = request_dict.get("_query")
        if query_params:
            path = f"{path}?{urlencode(query_params)}"
        # TODO: remove the hack that pops config.
        request_dict.pop("config", None)

        http_options: Optional[types.HttpOptions] = None
        if (
            parameter_model.config is not None
            and parameter_model.config.http_options is not None
        ):
            http_options = parameter_model.config.http_options

        request_dict = _common.convert_to_dict(request_dict)
        request_dict = _common.encode_unserializable_types(request_dict)

        response = await self._api_client.async_request(
            "post", path, request_dict, http_options
        )

        response_dict = {} if not response.body else json.loads(response.body)

        return_value = types.GenerateInstanceRubricsResponse._from_response(
            response=response_dict, kwargs=parameter_model.model_dump()
        )

        self._api_client._verify_response(return_value)
        return return_value

    async def _get_evaluation_run(
        self, *, name: str, config: Optional[types.GetEvaluationRunConfigOrDict] = None
    ) -> types.EvaluationRun:
        """
        Retrieves an EvaluationRun from the resource name.
        """

        parameter_model = types._GetEvaluationRunParameters(
            name=name,
            config=config,
        )

        request_url_dict: Optional[dict[str, str]]
        if not self._api_client.vertexai:
            raise ValueError("This method is only supported in the Vertex AI client.")
        else:
            request_dict = _GetEvaluationRunParameters_to_vertex(parameter_model)
            request_url_dict = request_dict.get("_url")
            if request_url_dict:
                path = "evaluationRuns/{name}".format_map(request_url_dict)
            else:
                path = "evaluationRuns/{name}"

        query_params = request_dict.get("_query")
        if query_params:
            path = f"{path}?{urlencode(query_params)}"
        # TODO: remove the hack that pops config.
        request_dict.pop("config", None)

        http_options: Optional[types.HttpOptions] = None
        if (
            parameter_model.config is not None
            and parameter_model.config.http_options is not None
        ):
            http_options = parameter_model.config.http_options

        request_dict = _common.convert_to_dict(request_dict)
        request_dict = _common.encode_unserializable_types(request_dict)

        response = await self._api_client.async_request(
            "get", path, request_dict, http_options
        )

        response_dict = {} if not response.body else json.loads(response.body)

        if self._api_client.vertexai:
            response_dict = _EvaluationRun_from_vertex(response_dict)

        return_value = types.EvaluationRun._from_response(
            response=response_dict, kwargs=parameter_model.model_dump()
        )

        self._api_client._verify_response(return_value)
        return return_value

    async def _get_evaluation_set(
        self, *, name: str, config: Optional[types.GetEvaluationSetConfigOrDict] = None
    ) -> types.EvaluationSet:
        """
        Retrieves an EvaluationSet from the resource name.
        """

        parameter_model = types._GetEvaluationSetParameters(
            name=name,
            config=config,
        )

        request_url_dict: Optional[dict[str, str]]
        if not self._api_client.vertexai:
            raise ValueError("This method is only supported in the Vertex AI client.")
        else:
            request_dict = _GetEvaluationSetParameters_to_vertex(parameter_model)
            request_url_dict = request_dict.get("_url")
            if request_url_dict:
                path = "evaluationSets/{name}".format_map(request_url_dict)
            else:
                path = "evaluationSets/{name}"

        query_params = request_dict.get("_query")
        if query_params:
            path = f"{path}?{urlencode(query_params)}"
        # TODO: remove the hack that pops config.
        request_dict.pop("config", None)

        http_options: Optional[types.HttpOptions] = None
        if (
            parameter_model.config is not None
            and parameter_model.config.http_options is not None
        ):
            http_options = parameter_model.config.http_options

        request_dict = _common.convert_to_dict(request_dict)
        request_dict = _common.encode_unserializable_types(request_dict)

        response = await self._api_client.async_request(
            "get", path, request_dict, http_options
        )

        response_dict = {} if not response.body else json.loads(response.body)

        return_value = types.EvaluationSet._from_response(
            response=response_dict, kwargs=parameter_model.model_dump()
        )

        self._api_client._verify_response(return_value)
        return return_value

    async def _get_evaluation_item(
        self, *, name: str, config: Optional[types.GetEvaluationItemConfigOrDict] = None
    ) -> types.EvaluationItem:
        """
        Retrieves an EvaluationItem from the resource name.
        """

        parameter_model = types._GetEvaluationItemParameters(
            name=name,
            config=config,
        )

        request_url_dict: Optional[dict[str, str]]
        if not self._api_client.vertexai:
            raise ValueError("This method is only supported in the Vertex AI client.")
        else:
            request_dict = _GetEvaluationItemParameters_to_vertex(parameter_model)
            request_url_dict = request_dict.get("_url")
            if request_url_dict:
                path = "evaluationItems/{name}".format_map(request_url_dict)
            else:
                path = "evaluationItems/{name}"

        query_params = request_dict.get("_query")
        if query_params:
            path = f"{path}?{urlencode(query_params)}"
        # TODO: remove the hack that pops config.
        request_dict.pop("config", None)

        http_options: Optional[types.HttpOptions] = None
        if (
            parameter_model.config is not None
            and parameter_model.config.http_options is not None
        ):
            http_options = parameter_model.config.http_options

        request_dict = _common.convert_to_dict(request_dict)
        request_dict = _common.encode_unserializable_types(request_dict)

        response = await self._api_client.async_request(
            "get", path, request_dict, http_options
        )

        response_dict = {} if not response.body else json.loads(response.body)

        return_value = types.EvaluationItem._from_response(
            response=response_dict, kwargs=parameter_model.model_dump()
        )

        self._api_client._verify_response(return_value)
        return return_value

    async def batch_evaluate(
        self,
        *,
        dataset: types.EvaluationDatasetOrDict,
        metrics: list[types.MetricOrDict],
        dest: str,
        config: Optional[types.EvaluateDatasetConfigOrDict] = None,
    ) -> types.EvaluateDatasetOperation:
        """Evaluates a dataset based on a set of given metrics."""
        resolved_metrics = _evals_common._resolve_metrics(metrics, self._api_client)
        output_config = genai_types.OutputConfig(
            gcs_destination=genai_types.GcsDestination(output_uri_prefix=dest)
        )
        parameter_model = types.EvaluateDatasetRequestParameters(
            dataset=dataset,
            metrics=resolved_metrics,
            output_config=output_config,
            config=config,
        )

        request_url_dict: Optional[dict[str, str]]
        if not self._api_client.vertexai:
            raise ValueError("This method is only supported in the Vertex AI client.")
        else:
            request_dict = _evals_utils.BatchEvaluateRequestPreparer.EvaluateDatasetRequestParameters_to_vertex(
                parameter_model
            )
            request_url_dict = request_dict.get("_url")
            if request_url_dict:
                path = ":evaluateDataset".format_map(request_url_dict)
            else:
                path = ":evaluateDataset"

        request_dict = _evals_utils.BatchEvaluateRequestPreparer.prepare_metric_payload(
            request_dict, resolved_metrics
        )

        query_params = request_dict.get("_query")
        if query_params:
            path = f"{path}?{urlencode(query_params)}"
        # TODO: remove the hack that pops config.
        request_dict.pop("config", None)

        http_options: Optional[types.HttpOptions] = None
        if (
            parameter_model.config is not None
            and parameter_model.config.http_options is not None
        ):
            http_options = parameter_model.config.http_options

        request_dict = _common.convert_to_dict(request_dict)
        request_dict = _common.encode_unserializable_types(request_dict)

        response = await self._api_client.async_request(
            "post", path, request_dict, http_options
        )

        response_dict = "" if not response.body else json.loads(response.body)

        if self._api_client.vertexai:
            response_dict = _evals_utils.BatchEvaluateRequestPreparer.EvaluateDatasetOperation_from_vertex(
                response_dict
            )

        return_value = types.EvaluateDatasetOperation._from_response(
            response=response_dict, kwargs=parameter_model.model_dump()
        )
        self._api_client._verify_response(return_value)

        return return_value

    async def evaluate_instances(
        self,
        *,
        metric_config: types._EvaluateInstancesRequestParameters,
    ) -> types.EvaluateInstancesResponse:
        """Evaluates an instance of a model."""

        if isinstance(metric_config, types._EvaluateInstancesRequestParameters):
            metric_config = metric_config.model_dump()  # type: ignore[assignment]
        else:
            metric_config = dict(metric_config)

        result = await self._evaluate_instances(
            **metric_config,
        )

        return result

    @_common.experimental_warning(
        "The Vertex SDK GenAI evals.get_evaluation_run module is experimental, "
        "and may change in future versions."
    )
    async def get_evaluation_run(
        self,
        *,
        name: str,
        include_evaluation_items: bool = False,
        config: Optional[types.GetEvaluationRunConfigOrDict] = None,
    ) -> types.EvaluationRun:
        """Retrieves the EvaluationRun from the resource name.
        Args:
          name: The resource name of the EvaluationRun. Format:
            `projects/{project}/locations/{location}/evaluationRuns/{evaluation_run}`
          include_evaluation_items: Whether to include the evaluation items in the
            response.
          config: The optional configuration for the evaluation run. Must be a dict or
              `types.GetEvaluationRunConfigOrDict` type.

        Returns:
          The evaluation run.
        Raises:
          ValueError: If the name is empty or invalid.
        """
        if not name:
            raise ValueError("name cannot be empty.")
        if name.startswith("projects/"):
            name = name.split("/")[-1]
        result = await self._get_evaluation_run(name=name, config=config)
        if include_evaluation_items:
            result.evaluation_item_results = (
                await _evals_common._convert_evaluation_run_results_async(
                    self._api_client,
                    result.evaluation_run_results,
                    result.inference_configs,
                )
            )

        return result

    @_common.experimental_warning(
        "The Vertex SDK GenAI evals.create_evaluation_run module is experimental, "
        "and may change in future versions."
    )
    async def create_evaluation_run(
        self,
        *,
        dataset: Union[types.EvaluationRunDataSource, types.EvaluationDataset],
        dest: str,
        metrics: list[types.EvaluationRunMetricOrDict],
        name: Optional[str] = None,
        display_name: Optional[str] = None,
        agent_info: Optional[types.evals.AgentInfo] = None,
        labels: Optional[dict[str, str]] = None,
        config: Optional[types.CreateEvaluationRunConfigOrDict] = None,
    ) -> types.EvaluationRun:
        """Creates an EvaluationRun.

        Args:
          dataset: The dataset to evaluate. Either an EvaluationRunDataSource or an EvaluationDataset.
          dest: The GCS URI prefix to write the evaluation results to.
          metrics: The list of metrics to evaluate.
          name: The name of the evaluation run.
          display_name: The display name of the evaluation run.
          agent_info: The agent info to evaluate.
          labels: The labels to apply to the evaluation run.
          config: The configuration for the evaluation run.

        Returns:
            The created evaluation run.
        """
        if agent_info and isinstance(agent_info, dict):
            agent_info = types.evals.AgentInfo.model_validate(agent_info)
        if type(dataset).__name__ == "EvaluationDataset":
            if dataset.eval_dataset_df is None:
                raise ValueError(
                    "EvaluationDataset must have eval_dataset_df populated."
                )
            if (
                dataset.candidate_name
                and agent_info.name
                and dataset.candidate_name != agent_info.name
            ):
                logger.warning(
                    "Evaluation dataset candidate_name and agent_info.name are different. Please make sure this is intended."
                )
            elif dataset.candidate_name is None and agent_info:
                dataset.candidate_name = agent_info.name
            eval_set = _evals_common._create_evaluation_set_from_dataframe(
                self._api_client, dest, dataset.eval_dataset_df, dataset.candidate_name
            )
            dataset = types.EvaluationRunDataSource(evaluation_set=eval_set.name)
        output_config = genai_types.OutputConfig(
            gcs_destination=genai_types.GcsDestination(output_uri_prefix=dest)
        )
        resolved_metrics = _evals_common._resolve_evaluation_run_metrics(
            metrics, self._api_client
        )
        evaluation_config = types.EvaluationRunConfig(
            output_config=output_config, metrics=resolved_metrics
        )
        inference_configs = {}
        if agent_info:
            inference_configs[agent_info.name] = types.EvaluationRunInferenceConfig(
                agent_config=types.EvaluationRunAgentConfig(
                    developer_instruction=genai_types.Content(
                        parts=[genai_types.Part(text=agent_info.instruction)]
                    ),
                    tools=agent_info.tool_declarations,
                )
            )
            if agent_info.agent_resource_name:
                labels = labels or {}
                labels["vertex-ai-evaluation-agent-engine-id"] = (
                    agent_info.agent_resource_name.split("reasoningEngines/")[-1]
                )
        if not name:
            name = f"evaluation_run_{uuid.uuid4()}"

        result = await self._create_evaluation_run(  # type: ignore[no-any-return]
            name=name,
            display_name=display_name or name,
            data_source=dataset,
            evaluation_config=evaluation_config,
            inference_configs=inference_configs,
            labels=labels,
            config=config,
        )

        return result

    @_common.experimental_warning(
        "The Vertex SDK GenAI evals.get_evaluation_set method is experimental, "
        "and may change in future versions."
    )
    async def get_evaluation_set(
        self,
        *,
        name: str,
        config: Optional[types.GetEvaluationSetConfigOrDict] = None,
    ) -> types.EvaluationSet:
        """Retrieves an EvaluationSet from the resource name.

        Args:
          name: The resource name of the EvaluationSet. Format:
            `projects/{project}/locations/{location}/evaluationSets/{evaluation_set}`
          config: The optional configuration for the evaluation set. Must be a dict or
              `types.GetEvaluationSetConfigOrDict` type.

        Returns:
          The evaluation set.
        """
        if not name:
            raise ValueError("name cannot be empty.")
        if name.startswith("projects/"):
            name = name.split("/")[-1]
        result = await self._get_evaluation_set(name=name, config=config)

        return result

    @_common.experimental_warning(
        "The Vertex SDK GenAI evals.get_evaluation_item method is experimental, "
        "and may change in future versions."
    )
    async def get_evaluation_item(
        self,
        *,
        name: str,
        config: Optional[types.GetEvaluationItemConfigOrDict] = None,
    ) -> types.EvaluationItem:
        """Retrieves an EvaluationItem from the resource name.

        Args:
          name: The resource name of the EvaluationItem. Format:
            `projects/{project}/locations/{location}/evaluationItems/{evaluation_item}`
          config: The optional configuration for the evaluation item. Must be a dict or
              `types.GetEvaluationItemConfigOrDict` type.

        Returns:
          The evaluation item.
        """
        if not name:
            raise ValueError("name cannot be empty.")
        if name.startswith("projects/"):
            name = name.split("/")[-1]
        result = await self._get_evaluation_item(name=name, config=config)
        if (
            result.gcs_uri
            and result.evaluation_item_type == types.EvaluationItemType.RESULT
        ):
            result.evaluation_response = (
                _evals_common._convert_gcs_to_evaluation_item_result(
                    self._api_client, result.gcs_uri
                )
            )
        elif (
            result.gcs_uri
            and result.evaluation_item_type == types.EvaluationItemType.REQUEST
        ):
            result.evaluation_request = (
                _evals_common._convert_gcs_to_evaluation_item_request(
                    self._api_client, result.gcs_uri
                )
            )

        return result

    @_common.experimental_warning(
        "The Vertex SDK GenAI evals.create_evaluation_item module is experimental, "
        "and may change in future versions."
    )
    async def create_evaluation_item(
        self,
        *,
        evaluation_item_type: types.EvaluationItemType,
        gcs_uri: str,
        display_name: Optional[str] = None,
        config: Optional[types.CreateEvaluationItemConfigOrDict] = None,
    ) -> types.EvaluationItem:
        """Creates an EvaluationItem.

        Args:
          evaluation_item_type: The type of the evaluation item.
          gcs_uri: The GCS URI of the evaluation item.
          display_name: The display name of the evaluation item.
          config: The optional configuration for the evaluation item. Must be a dict or
              `types.CreateEvaluationItemConfigOrDict` type.

        Returns:
          The evaluation item.
        """
        result = await self._create_evaluation_item(  # type: ignore[no-any-return]
            evaluation_item_type=evaluation_item_type,
            gcs_uri=gcs_uri,
            display_name=display_name,
            config=config,
        )
        return result

    @_common.experimental_warning(
        "The Vertex SDK GenAI evals.create_evaluation_set module is experimental, "
        "and may change in future versions."
    )
    async def create_evaluation_set(
        self,
        *,
        evaluation_items: list[str],
        display_name: Optional[str] = None,
        config: Optional[types.CreateEvaluationSetConfigOrDict] = None,
    ) -> types.EvaluationSet:
        """Creates an EvaluationSet.

        Args:
          evaluation_items: The list of evaluation item names. Format:
            `projects/{project}/locations/{location}/evaluationItems/{evaluation_item}`
          display_name: The display name of the evaluation set.
          config: The optional configuration for the evaluation set. Must be a dict or
              `types.CreateEvaluationSetConfigOrDict` type.

        Returns:
          The evaluation set.
        """
        result = await self._create_evaluation_set(  # type: ignore[no-any-return]
            evaluation_items=evaluation_items,
            display_name=display_name,
            config=config,
        )
        return result
